{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Matching without replacement"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 0. Import libraries "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-05T13:37:59.927516500Z",
     "start_time": "2024-03-05T13:37:55.056727900Z"
    }
   },
   "outputs": [],
   "source": [
    "from lightautoml.addons.hypex import Matcher"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. Create or upload your dataset  \n",
    "In this case we will create random dataset with known effect size  \n",
    "If you have your own dataset, go to the part 2 \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-05T13:37:59.952659200Z",
     "start_time": "2024-03-05T13:37:59.931623200Z"
    }
   },
   "outputs": [],
   "source": [
    "from lightautoml.addons.hypex.utils.tutorial_data_creation import create_test_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-05T13:38:18.298652Z",
     "start_time": "2024-03-05T13:37:59.946835700Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>user_id</th>\n",
       "      <th>signup_month</th>\n",
       "      <th>treat</th>\n",
       "      <th>pre_spends</th>\n",
       "      <th>post_spends</th>\n",
       "      <th>age</th>\n",
       "      <th>gender</th>\n",
       "      <th>industry</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>504.5</td>\n",
       "      <td>422.777778</td>\n",
       "      <td>NaN</td>\n",
       "      <td>F</td>\n",
       "      <td>Logistics</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>500.0</td>\n",
       "      <td>506.333333</td>\n",
       "      <td>51.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>E-commerce</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>485.0</td>\n",
       "      <td>434.000000</td>\n",
       "      <td>56.0</td>\n",
       "      <td>F</td>\n",
       "      <td>Logistics</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>8</td>\n",
       "      <td>1</td>\n",
       "      <td>452.0</td>\n",
       "      <td>468.111111</td>\n",
       "      <td>46.0</td>\n",
       "      <td>M</td>\n",
       "      <td>E-commerce</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>488.5</td>\n",
       "      <td>420.111111</td>\n",
       "      <td>56.0</td>\n",
       "      <td>M</td>\n",
       "      <td>Logistics</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9995</th>\n",
       "      <td>9995</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>482.0</td>\n",
       "      <td>501.666667</td>\n",
       "      <td>31.0</td>\n",
       "      <td>M</td>\n",
       "      <td>Logistics</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9996</th>\n",
       "      <td>9996</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>453.0</td>\n",
       "      <td>406.888889</td>\n",
       "      <td>53.0</td>\n",
       "      <td>M</td>\n",
       "      <td>Logistics</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9997</th>\n",
       "      <td>9997</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>461.0</td>\n",
       "      <td>415.111111</td>\n",
       "      <td>52.0</td>\n",
       "      <td>F</td>\n",
       "      <td>E-commerce</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9998</th>\n",
       "      <td>9998</td>\n",
       "      <td>10</td>\n",
       "      <td>1</td>\n",
       "      <td>491.5</td>\n",
       "      <td>439.222222</td>\n",
       "      <td>22.0</td>\n",
       "      <td>M</td>\n",
       "      <td>E-commerce</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9999</th>\n",
       "      <td>9999</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>481.0</td>\n",
       "      <td>517.222222</td>\n",
       "      <td>53.0</td>\n",
       "      <td>M</td>\n",
       "      <td>E-commerce</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>10000 rows × 8 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      user_id  signup_month  treat  pre_spends  post_spends   age gender  \\\n",
       "0           0             0      0       504.5   422.777778   NaN      F   \n",
       "1           1             4      1       500.0   506.333333  51.0    NaN   \n",
       "2           2             0      0       485.0   434.000000  56.0      F   \n",
       "3           3             8      1       452.0   468.111111  46.0      M   \n",
       "4           4             0      0       488.5   420.111111  56.0      M   \n",
       "...       ...           ...    ...         ...          ...   ...    ...   \n",
       "9995     9995             2      1       482.0   501.666667  31.0      M   \n",
       "9996     9996             0      0       453.0   406.888889  53.0      M   \n",
       "9997     9997             0      0       461.0   415.111111  52.0      F   \n",
       "9998     9998            10      1       491.5   439.222222  22.0      M   \n",
       "9999     9999             2      1       481.0   517.222222  53.0      M   \n",
       "\n",
       "        industry  \n",
       "0      Logistics  \n",
       "1     E-commerce  \n",
       "2      Logistics  \n",
       "3     E-commerce  \n",
       "4      Logistics  \n",
       "...          ...  \n",
       "9995   Logistics  \n",
       "9996   Logistics  \n",
       "9997  E-commerce  \n",
       "9998  E-commerce  \n",
       "9999  E-commerce  \n",
       "\n",
       "[10000 rows x 8 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = create_test_data(num_users=10000, rs=42, na_step=45, nan_cols=['age', 'gender'])\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-05T13:38:18.418303900Z",
     "start_time": "2024-03-05T13:38:18.298652Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['user_id', 'signup_month', 'treat', 'pre_spends', 'post_spends', 'age',\n",
       "       'gender', 'industry'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-05T13:38:18.477754900Z",
     "start_time": "2024-03-05T13:38:18.336440500Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "treat\n",
       "0    5002\n",
       "1    4998\n",
       "Name: count, dtype: int64"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['treat'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-05T13:38:18.619416200Z",
     "start_time": "2024-03-05T13:38:18.362065400Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "223"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['gender'].isna().sum()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. Matching  without replacement\n",
    "### 2.0 Init params\n",
    "info_col used to define informative attributes that should not be part of matching, such as user_id  \n",
    "But to explicitly store this column in the table, so that you can compare directly after computation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-05T13:38:18.634761700Z",
     "start_time": "2024-03-05T13:38:18.408068600Z"
    }
   },
   "outputs": [],
   "source": [
    "info_col = ['user_id']\n",
    "\n",
    "outcome = 'post_spends'\n",
    "treatment = 'treat'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2.1 Matching\n",
    "This is the easiest way to initialize and calculate metrics on a Matching task  \n",
    "Use it when you are clear about each attribute or if you don't have any additional task conditions (Strict equality for certain features) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-05T13:38:18.660452500Z",
     "start_time": "2024-03-05T13:38:18.428454200Z"
    }
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[18.12.2024 22:04:52 | hypex | INFO]: Number of NaN values filled with zeros: 446\n"
     ]
    }
   ],
   "source": [
    "# Standard model with base parameters\n",
    "model = Matcher(input_data=df, outcome=outcome, treatment=treatment, info_col=info_col)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-05T13:38:30.559294800Z",
     "start_time": "2024-03-05T13:38:28.180655600Z"
    }
   },
   "outputs": [],
   "source": [
    "df_matched = model.match_no_rep()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>signup_month</th>\n",
       "      <th>treat</th>\n",
       "      <th>pre_spends</th>\n",
       "      <th>post_spends</th>\n",
       "      <th>age</th>\n",
       "      <th>gender_F</th>\n",
       "      <th>gender_M</th>\n",
       "      <th>industry_Logistics</th>\n",
       "      <th>user_id</th>\n",
       "      <th>signup_month_matched</th>\n",
       "      <th>treat_matched</th>\n",
       "      <th>pre_spends_matched</th>\n",
       "      <th>post_spends_matched</th>\n",
       "      <th>age_matched</th>\n",
       "      <th>gender_F_matched</th>\n",
       "      <th>gender_M_matched</th>\n",
       "      <th>industry_Logistics_matched</th>\n",
       "      <th>user_id_matched</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>504.5</td>\n",
       "      <td>422.777778</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>522.5</td>\n",
       "      <td>509.777778</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>4095</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>500.0</td>\n",
       "      <td>506.333333</td>\n",
       "      <td>51.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>488.0</td>\n",
       "      <td>420.333333</td>\n",
       "      <td>50.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3916</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>485.0</td>\n",
       "      <td>434.000000</td>\n",
       "      <td>56.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>492.5</td>\n",
       "      <td>510.777778</td>\n",
       "      <td>56.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>6057</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>8</td>\n",
       "      <td>1</td>\n",
       "      <td>452.0</td>\n",
       "      <td>468.111111</td>\n",
       "      <td>46.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>453.5</td>\n",
       "      <td>415.222222</td>\n",
       "      <td>42.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>5255</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>488.5</td>\n",
       "      <td>420.111111</td>\n",
       "      <td>56.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>8</td>\n",
       "      <td>1</td>\n",
       "      <td>488.0</td>\n",
       "      <td>472.111111</td>\n",
       "      <td>59.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>6874</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9991</th>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>482.0</td>\n",
       "      <td>501.666667</td>\n",
       "      <td>31.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>9995</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>474.0</td>\n",
       "      <td>435.111111</td>\n",
       "      <td>28.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>8397</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9992</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>453.0</td>\n",
       "      <td>406.888889</td>\n",
       "      <td>53.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>9996</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>459.0</td>\n",
       "      <td>518.777778</td>\n",
       "      <td>57.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>8416</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9993</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>461.0</td>\n",
       "      <td>415.111111</td>\n",
       "      <td>52.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>9997</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>459.5</td>\n",
       "      <td>472.111111</td>\n",
       "      <td>51.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>5727</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9994</th>\n",
       "      <td>10</td>\n",
       "      <td>1</td>\n",
       "      <td>491.5</td>\n",
       "      <td>439.222222</td>\n",
       "      <td>22.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>9998</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>492.5</td>\n",
       "      <td>411.555556</td>\n",
       "      <td>25.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>5675</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9995</th>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>481.0</td>\n",
       "      <td>517.222222</td>\n",
       "      <td>53.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>9999</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>471.5</td>\n",
       "      <td>427.000000</td>\n",
       "      <td>53.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>6361</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>9996 rows × 18 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      signup_month  treat  pre_spends  post_spends   age  gender_F  gender_M  \\\n",
       "0                0      0       504.5   422.777778   0.0         1         0   \n",
       "1                4      1       500.0   506.333333  51.0         0         0   \n",
       "2                0      0       485.0   434.000000  56.0         1         0   \n",
       "3                8      1       452.0   468.111111  46.0         0         1   \n",
       "4                0      0       488.5   420.111111  56.0         0         1   \n",
       "...            ...    ...         ...          ...   ...       ...       ...   \n",
       "9991             2      1       482.0   501.666667  31.0         0         1   \n",
       "9992             0      0       453.0   406.888889  53.0         0         1   \n",
       "9993             0      0       461.0   415.111111  52.0         1         0   \n",
       "9994            10      1       491.5   439.222222  22.0         0         1   \n",
       "9995             2      1       481.0   517.222222  53.0         0         1   \n",
       "\n",
       "      industry_Logistics  user_id  signup_month_matched  treat_matched  \\\n",
       "0                      1        0                     4              1   \n",
       "1                      0        1                     0              0   \n",
       "2                      1        2                     2              1   \n",
       "3                      0        3                     0              0   \n",
       "4                      1        4                     8              1   \n",
       "...                  ...      ...                   ...            ...   \n",
       "9991                   1     9995                     0              0   \n",
       "9992                   1     9996                     4              1   \n",
       "9993                   0     9997                     7              1   \n",
       "9994                   0     9998                     0              0   \n",
       "9995                   0     9999                     0              0   \n",
       "\n",
       "      pre_spends_matched  post_spends_matched  age_matched  gender_F_matched  \\\n",
       "0                  522.5           509.777778          0.0                 1   \n",
       "1                  488.0           420.333333         50.0                 0   \n",
       "2                  492.5           510.777778         56.0                 1   \n",
       "3                  453.5           415.222222         42.0                 0   \n",
       "4                  488.0           472.111111         59.0                 0   \n",
       "...                  ...                  ...          ...               ...   \n",
       "9991               474.0           435.111111         28.0                 0   \n",
       "9992               459.0           518.777778         57.0                 0   \n",
       "9993               459.5           472.111111         51.0                 1   \n",
       "9994               492.5           411.555556         25.0                 0   \n",
       "9995               471.5           427.000000         53.0                 0   \n",
       "\n",
       "      gender_M_matched  industry_Logistics_matched  user_id_matched  \n",
       "0                    0                           1             4095  \n",
       "1                    0                           0             3916  \n",
       "2                    0                           1             6057  \n",
       "3                    1                           0             5255  \n",
       "4                    1                           1             6874  \n",
       "...                ...                         ...              ...  \n",
       "9991                 1                           1             8397  \n",
       "9992                 0                           1             8416  \n",
       "9993                 0                           0             5727  \n",
       "9994                 1                           0             5675  \n",
       "9995                 1                           0             6361  \n",
       "\n",
       "[9996 rows x 18 columns]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_matched"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.15"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
